For one continuous variable = Numeric:
For one discrete varaible = Factor:
library(ggplot2)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
alpha, color, fill, linetype, size
set.seed(1234)
wdata = as_data_frame(data.frame(sex = factor(rep(c("F", "M"), each=200)), weight = c(rnorm(200,55),rnorm(200,58))))
wdata## # A tibble: 400 × 2
## sex weight
## <fctr> <dbl>
## 1 F 53.79293
## 2 F 55.27743
## 3 F 56.08444
## 4 F 52.65430
## 5 F 55.42912
## 6 F 55.50606
## 7 F 54.42526
## 8 F 54.45337
## 9 F 54.43555
## 10 F 54.10996
## # ... with 390 more rows
mu <- wdata %>% group_by(sex) %>% summarize(grp.mean = mean(weight))
mu## # A tibble: 2 × 2
## sex grp.mean
## <fctr> <dbl>
## 1 F 54.94224
## 2 M 58.07325
a <- ggplot(wdata, aes(x = weight))
a + geom_area(stat = "bin", color = "black", fill = "#00AFBB")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#a + geom_area() will not get right result, object 'y' not found. Use stat to specify the count as y
#Note that, by default y axis corresponds to the count of weight values. If you want to change the plot in order to have the density on y axis, the R code would be as follow.
a + geom_area(aes(y = ..density..), stat = "bin")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
data("diamonds")
diamonds <- as_data_frame(diamonds)
diamonds## # A tibble: 53,940 × 10
## carat cut color clarity depth table price x y z
## <dbl> <ord> <ord> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43
## 2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31
## 3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31
## 4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63
## 5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75
## 6 0.24 Very Good J VVS2 62.8 57 336 3.94 3.96 2.48
## 7 0.24 Very Good I VVS1 62.3 57 336 3.95 3.98 2.47
## 8 0.26 Very Good H SI1 61.9 55 337 4.07 4.11 2.53
## 9 0.22 Fair E VS2 65.1 61 337 3.87 3.78 2.49
## 10 0.23 Very Good H VS1 59.4 61 338 4.00 4.05 2.39
## # ... with 53,930 more rows
p <- ggplot(diamonds, aes(x = price, fill = cut))
# Bar plot
p + geom_bar(stat = "bin")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Area plot
p + geom_area(stat = "bin")## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
alpha, color, fill, linetype, size
# Basic plots
a + geom_density()# Add color and mean xintercept and median xintercept
a + geom_density(color = "black", fill = "gray") + geom_vline(aes(xintercept = mean(weight)), color = "#FC4E08", linetype = "dashed", size = 1) + geom_vline(aes(xintercept = median(weight)), color = "blue", linetype = 4, size = 1)# Change color by group
a + geom_density(aes(fill = sex), alpha = 0.4) # Add mean lines and color by sex
a + geom_density(aes(fill = sex), alpha = 0.4) + geom_vline(data = mu, aes(xintercept = grp.mean, color = sex), linetype = "dashed")# Change manually
# change line manually
a2 <- a + geom_density(aes(color = sex)) + geom_vline(data = mu, aes(xintercept = grp.mean, color = sex), linetype = "dashed") + theme_minimal()
a2 + scale_color_manual(values = c("#999999", "#E69F00"))a2 + scale_color_brewer(palette = "Paired")a2 + scale_color_grey()# change fill manually
a3 <- a + geom_density(aes(fill = sex), alpha = 0.4) + theme_minimal()
a3 + scale_fill_manual(values = c("#999999", "#E69F00"))a3 + scale_fill_brewer(palette = "Dark2")a3 + scale_fill_grey()identity(position_identity()), stack(position_stack()), dodge(position_dodge()); Default values is “stack”
alpha, color, fill, linetype, size
# Basic plot
a + geom_histogram()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
a + geom_histogram(bins = 50)#Note that by default, stat_bin uses 30 bins - this might not be good default. You can change the number of bins (e.g.: bins = 50 or the bin width e.g.: binwidth = 0.5.
a + geom_histogram(bins = 50, color = "black", fill = "grey") + geom_vline(aes(xintercept = mean(weight)), color = "#FC4E07", linetype = "dashed", size = 1) + theme_minimal()a + geom_histogram(aes(y = ..density..), bins = 50)# Change color by sex
a + geom_histogram(aes(color = sex), fill = "white", bins = 50) + theme_minimal()# Position adjustment "identity"(overlaid)
a + geom_histogram(aes(color = sex), fill = "white", bins = 50, alpha = 0.6, position = "identity")# Position adjustment "dodge" (Interleaved)
# Add mean lines and color by sex
a + geom_histogram(aes(color = sex), fill = "white", alpha = 0.6, position = "dodge", bins = 50) + geom_vline(aes(xintercept = mean(weight)), linetype = "dashed")# Change fill, color manually
# Change outline color manually
a + geom_histogram(aes(color = sex), fill = "white", alpha = 0.4, position = "identity", bins = 50) + scale_color_manual(values = c("#00AFBB","#E7B800"))# Change fill and outline color manually
# a + geom_histogram(aes(color = sex), fill = "white", alpha =0.4, position = "identity", bins = 50) + scale_fill_manual(values = c("#00AFBB", "#E7B800")) + scale_color_manual(values = c("#00AFBB", "#E7B800"))
# wrong command, I have to assign fill first by group
a + geom_histogram(aes(color = sex, fill = sex), alpha =0.4, position = "identity", bins = 50) + scale_fill_manual(values = c("#00AFBB", "#E7B800")) + scale_color_manual(values = c("#00AFBB", "#E7B800")) ## Combine Histogram and Density Plots
# Plot histogram with density values on y-axis(instead of count values).
# Add density plot with transparent density plot
# Histogram with density plot
a + geom_histogram(aes(y = ..density..),color = "black", fill = "white") + geom_density(alpha = 0.2, fill = "#FF6666") + theme_minimal()## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Color by groups
a + geom_histogram(aes(y = ..density.., color = sex, fill = sex), alpha = 0.4, position = "identity") + geom_density(aes(color = sex), size =1)## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Very close to histogram plots
alpha, color, linetype, size
# Basic plot
a + geom_freqpoly(bins = 30) + theme_minimal()# Change color and linetype by sex
# Use custom color palettes
a + geom_freqpoly(aes(color = sex, linetype = sex), bins = 30 ) + scale_color_manual(values = c("#999999", "#E69F00"))+theme_minimal()# y density
a + geom_freqpoly(aes(y = ..density.., color = sex, linetype = sex), bins = 30 ) + scale_color_manual(values = c("#999999", "#E69F00"))+theme_minimal()Not suitable for one variable, it’s ugly.
a + geom_dotplot(aes(fill = sex))## `stat_bindot()` using `bins = 30`. Pick better value with `binwidth`.
Empirical Cumulative Density Function
alpha, color, linetype, size
a + stat_ecdf(geom = "point")a + stat_ecdf(geom = "step")Quantile - Quantie plots to chech whether a given data follows normal distribution.
alpha, color, shape, size
data(mtcars)
mtcars <- as_data_frame(mtcars)
mtcars## # A tibble: 32 × 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## * <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## # ... with 22 more rows
mtcars <- mutate(mtcars, cyl = as.factor(cyl))
mtcars## # A tibble: 32 × 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## # ... with 22 more rows
p <- ggplot(mtcars, aes(sample = mpg))
# Basic plot
p + stat_qq()# Change point shapes by groups
# Use custom color palettes
p + stat_qq(aes(shape = cyl, color = cyl)) + scale_color_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))For one discrete variable
alpha, color, fill, linetype, size
data(mpg)
mpg <- as_data_frame(mpg)
mpg## # A tibble: 234 × 11
## manufacturer model displ year cyl trans drv cty hwy
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29
## 3 audi a4 2.0 2008 4 manual(m6) f 20 31
## 4 audi a4 2.0 2008 4 auto(av) f 21 30
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26
## 7 audi a4 3.1 2008 6 auto(av) f 18 27
## 8 audi a4 quattro 1.8 1999 4 manual(m5) 4 18 26
## 9 audi a4 quattro 1.8 1999 4 auto(l5) 4 16 25
## 10 audi a4 quattro 2.0 2008 4 manual(m6) 4 20 28
## # ... with 224 more rows, and 2 more variables: fl <chr>, class <chr>
ggplot(mpg, aes(fl)) + geom_bar(fill = "steelblue") + theme_minimal()geom_point
alpha, color, fill, shape, size
# Data format
mtcars## # A tibble: 32 × 11
## mpg cyl disp hp drat wt qsec vs am gear carb
## <dbl> <fctr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## 2 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## 3 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## 4 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 5 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 6 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 7 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 8 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 9 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 10 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## # ... with 22 more rows
b <- ggplot(mtcars, aes(x = wt, y= mpg))
# x weight
# y miles/gallon
#Basic scatter plots
b + geom_point(color = "#00AFBB")# Change the point size, and shape
b + geom_point(color = "#00AFBB", size = 2, shape = 23)# Control point size by continuous variable values
# qsec 1/4 mile time
b + geom_point(aes(size = qsec), color = "#00AFBB")# Label text
b + geom_point() + geom_text(label = rownames(mtcars), nudge_y = 0.8)# Change shape, color, size automatically
# Change point shape by the level of cyl
b + geom_point(aes(shape = cyl))# Change point shape and colors
b + geom_point(aes(color = cyl, shape = cyl))# Change shape, color, size manually
# Change the point sizes manually
b + geom_point(aes(color = cyl, shape = cyl, size = cyl)) + scale_size_manual(values = c(2,3,4))# Change the point shapes and colors manually
b + geom_point(aes(color = cyl, shape = cyl)) + scale_shape_manual(values = c(3,16,17)) + scale_color_manual(values = c('#999999','#E69F00', '#56B4E9'))# Use brewer color palettes
b + geom_point(aes(color = cyl, shape = cyl)) + scale_color_brewer(palette = "Dark2") + theme_minimal()# Use grey scale
b + geom_point(aes(color = cyl, shape = cyl)) + scale_color_grey() + theme_minimal()#####################################################
##Add regression line or smoothed conditional mean###
#####################################################
#geom_smooth(), geom_abline()
#alpha, color, fill, shape, linetype, size
#geom_smooth(method = "auto")
#method:loess->local regression, lm-> linear regression
# Add regression line
b + geom_point() + geom_smooth(method = lm)# Point + regression line
# Remove the confidence interval
b + geom_point() + geom_smooth(method = lm, se = FALSE)# loess method, local regression fitting
b + geom_point() + geom_smooth()# Change the color and shape by groups 吧
b + geom_point(aes(color = cyl, shape = cyl)) + geom_smooth(aes(color = cyl, fill = cyl), method = lm)# Remove confidence intervals
# Extend the regression lines: fullrage
b + geom_point(aes(color = cyl, shape = cyl)) + geom_smooth(aes(color = cyl), method = lm, se = FALSE, fullrange = TRUE)# Add marginal rugs to a scatter plot
#geom_rug(sides = "bl")
# sides: a string, "trbl", top, right, bottom, left.
# Add marginal rugs
b + geom_point() + geom_rug()# Change the color by group
b + geom_point(aes(color = cyl)) + geom_rug(aes(color = cyl))# Add marginal rugs using faithful data
data(faithful)
faithful <- as_data_frame(faithful)
faithful## # A tibble: 272 × 2
## eruptions waiting
## * <dbl> <dbl>
## 1 3.600 79
## 2 1.800 54
## 3 3.333 74
## 4 2.283 62
## 5 4.533 85
## 6 2.883 55
## 7 4.700 88
## 8 3.600 85
## 9 1.950 51
## 10 4.350 85
## # ... with 262 more rows
ggplot(faithful, aes(x = eruptions, y = waiting)) + geom_point() + geom_rug()# Jitter points to reduce overplotting
# geom_jitter(), position_jitter()
#alpha, color, fill, shape, size
# Use mpg data
p <- ggplot(mpg, aes(displ, hwy))
# Default sactter plot
p + geom_point()# Use jitter to reduce overplotting
p + geom_jitter(position = position_jitter(width = 0.5, height = 0.5))select(mpg, displ, hwy) %>% arrange(-hwy) %>% filter(displ == 1.9)## # A tibble: 3 × 2
## displ hwy
## <dbl> <int>
## 1 1.9 44
## 2 1.9 44
## 3 1.9 41
##
#Text annotation
#geom_text()
#label, alpha, angle, color, family, fontface, hjust, lineheight, size, vjust
b + geom_text(aes(label = rownames(mtcars)), size = 3)c <- ggplot(diamonds, aes(carat, price))
# Add heatmap of 2d bin counts
# geom_bin2d produce a scatter plot with rectangular bins.
# stat_bin_2d(), stat_summary_2d()
# max, xmin, ymax, ymin, alpha, color, fill, linetype, size
c + geom_bin2d()# Change the number of bins
c + geom_bin2d(bins = 15)# Specify the width of bins
c + geom_bin2d(binwidth = c(1,1000))c + stat_bin_2d()c + stat_summary_2d(aes(z = depth))# Add hexagon bining
#geom_hex()
# stat_bin_hex(), stat_summary_hex()
# alpha, color, fill, size
require(hexbin)## Loading required package: hexbin
c + geom_hex()# Change the number of bins
c + geom_hex(bins = 10)c + stat_bin_hex()c + stat_summary_hex(aes(z = depth))# 2D density estimation
# geom_density_2d()
# stat_density_2d()
# alpha, color, linetype, size
# Scatter plot
sp <- ggplot(faithful, aes(x = eruptions, y = waiting))
select(faithful, eruptions, waiting)## # A tibble: 272 × 2
## eruptions waiting
## * <dbl> <dbl>
## 1 3.600 79
## 2 1.800 54
## 3 3.333 74
## 4 2.283 62
## 5 4.533 85
## 6 2.883 55
## 7 4.700 88
## 8 3.600 85
## 9 1.950 51
## 10 4.350 85
## # ... with 262 more rows
# Default plot
sp + geom_density_2d(color = "#E7B800")# Add points
sp + geom_point(color = "#00AFBB") + geom_density_2d(color = "#E7B800")# Use stat_density_2d with geom = "polygon"
sp + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon")# Change the gradient color
sp + geom_point() + stat_density_2d(aes(fill = ..level..), geom = "polygon") + scale_fill_gradient(low = "#00AFBB", high = "#FC3E07")# Gradientgeom_jitter
alpha, color, fill, shape, size
ggplot(diamonds, aes(cut, color)) + geom_jitter(aes(color = cut), size = 0.5)select(diamonds, cut, color)## # A tibble: 53,940 × 2
## cut color
## <ord> <ord>
## 1 Ideal E
## 2 Premium E
## 3 Good E
## 4 Premium I
## 5 Good J
## 6 Very Good J
## 7 Very Good I
## 8 Very Good H
## 9 Fair E
## 10 Very Good H
## # ... with 53,930 more rows
data("ToothGrowth")
ToothGrowth$dose <- as.factor(ToothGrowth$dose)
ToothGrowth <- as_data_frame(ToothGrowth)
ToothGrowth## # A tibble: 60 × 3
## len supp dose
## <dbl> <fctr> <fctr>
## 1 4.2 VC 0.5
## 2 11.5 VC 0.5
## 3 7.3 VC 0.5
## 4 5.8 VC 0.5
## 5 6.4 VC 0.5
## 6 10.0 VC 0.5
## 7 11.2 VC 0.5
## 8 11.2 VC 0.5
## 9 5.2 VC 0.5
## 10 7.0 VC 0.5
## # ... with 50 more rows
e <- ggplot(ToothGrowth, aes(x = dose, y = len))alpha, color, linetype, shape, size, fill
# Basic box plot
e + geom_boxplot()# Rotate the box plot
e + geom_boxplot() + coord_flip()# Notched box plot
e + geom_boxplot(notch = TRUE)# Box plot with mean points
e + geom_boxplot() + stat_summary(fun.y = mean, geom = "point", shape = 18, size = 4, color = "blue")